library("quanteda")
library("topicmodels")
library("readtext")
library(readr)
library(tidytext)
library(dplyr)
library(ggplot2)

d_data <- read_csv(# specify location make sure file contains only one column!
  d_data$text <- d_data$X1 # omdat corpus een variable nodig heeft die "text" heet, kopieren we deze
d_data$X1 <- NULL # we verwijderen X1
d_corpus <- corpus(d_data) # translate into corpus
d_corpus <- tolower(d_corpus)
d_tokens <- tokens(d_corpus) #split hele file op in tokens, apparte woorden

# breidt het stopwoordenboek wat uit in functie van de data
new_stopwords <- c("👏","👇","🔵", "🌹","✅","🇬🇧","💙", "✔","👉","🚨","❌",
                   "🇬🇧","🌳","🗳","👍","🗣","📢","📆","⬇","⤵",
                   "minut*", "hour*", "amp", "http*", "@*", "#*", "even", "will", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "extra", "use", "rail", "today", "yesterday", "tomorrow", "now", "full",  "bring", "big", "say", "meet", "across", "add", "step", "s", "t", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "summer", "winter", "spring", "autumn", "ever", "pm", "am", "rt", "back", "like", "still", "see", "end", "first", "many", "come", "yet", "real", "find", "sure", "never", "january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december", "day*", "week*", "month*", "always", "since", "become", "keep", "let", "can", "ahead", "new", "hard", "made", "ago", "just", "still", stopwords("english"))
# translate into dataframe en verwijder stopwoorden, punten en nummers
d_dfm <- dfm(d_corpus, remove = new_stopwords, remove_punct = TRUE, remove_numbers = TRUE, stem = TRUE)

mind_perception_dictionary <- dictionary(list(
                    Experience = c("Feel*", "Perceive*", "Perception", "Mood", "Pain", "Ache*", "Agony", "Discomfort", "Injur*", "Wound*", "Miser*", "Distress*", "Anguish*", "Anxiety", "Anxious", "Grief", "Sad*", "Devastated", "Suffer*", "Hurt*", "Emotion*", "Anger", "Angry", "Surprise*", "Disgust*", "Pleasure", "Amuse*", "Enjoy*", "Gratif*", "Passon*","Experience*", "Hunger", "Hungry", "Starved", "Rage", "Enrage*", "Bitter", "Irritated", "Mad", "Obsess*", "Upset", "Aggressive", "Agitated", "Desire*", "Ambition", "Appetite", "Aspir*", "Crav*", "Devot*", "Eager*", "Fascinat*", "Passion", "Greed*", "Lust*", "Inclination", "Admir*", "Affection", "Pride*", "Digni*", "Honor", "Self-esteem", "Temper", "Nostalgic", "Embarrass*", "Shame", "Ashamed", "Humble", "Distress*", "Humiliated", "Joy*", "Happy", "Delight*", "Elate*", "Satisfied", "Satisfaction", "Glad", "Content", "Pleased","Interested", "Absorbed", "Engrossed", "Keen", "Attracted", "Enticed", "Excited", "Thrilled", "Guilty", "Remorse*", "Sorry", "Regret*", "Scared", "Panic*", "Alarmed", "Horrified", "Terrified", "Worried", "Trembling", "Hostile", "Advers*", "Belligerent", "Nasty", "Hateful", "Unfriendly", "Argumentative", "Catty", "Unkind", "Enthusiastic", "Avid", "Earnest", "Proud", "Appreciative", "Illustrious", "Irritable", "Contentious","Exasperated", "Annoyed", "Frustrated", "Grouchy", "Testy", "Brooding", "Moody", "Alert", "Observant", "Perceptive", "Wise", "Clever", "Inspired", "Motivated", "Motivation", "Jittery", "Uptight", "Antsy", "Apprehensive", "Afraid", "Suspicious", "Shocked", "Cheerful", "Jolly", "Joyful", "Jovial", "Optimist*", "Bashful", "Coy", "Self-conscious", "Reticent", "Blush*", "Modest", "Abashed", "Sluggish", "Apathetic", "Daring", "Audacious", "Courageous", "Strong", "Resilient","Tenacious", "Dedicated", "Scornful", "Contemptuous", "Disdain*", "Sneering", "Cynical", "Arrogant", "Egotistic*", "Snooty", "Relaxed", "Serene", "Tranquil", "Poised", "Patien*", "Fear*", "Dread", "Calm", "Tired", "Amazed", "Astounded","Perplexed*", "Dumbfounded", "Dazed","Bewildered", "Timid", "Alert", "Bold", "Unafraid", "Brave", "Shy", "Nervous", "Lonely","Lonesome","Sleepy","Lively","Peppy","At-ease","Drowsy","Downhearted","Dejected","Sorrow*","Dismay*","Dishearten*","Sheepish","Foolish","Mortified","Frightened","Astonished","Loathing","Hatred","Detest*","Confident","Hope*","Energetic","Inspiration","Elevation","Exalted","Awe","Dazzl*","Relie*","Comfortable","Solace","Sympath*","Compassion","Empath*","Concern*","Care*","Caring"),
                    Agency = c("Purpose","Plan*","Prepare*","Organize*","Calculate*","Concoct*","Conspire*","Contemplat*","Devise*","Formulate*","Decisive","Decide*","Goal*","Conscious*","Attentive","Aware*","Cognizant","Responsive","Rational","Responsible","Deliberate","Memory","Memorize*","Recall*","Recollect*","Forget*","Forgot*","Remembers","Remembered","Thought*","Considerate","Determined","Judge*","Realize*","Understand*","Conclude*","Foresee*","Comprehend*","Think*","Reason*","Believe*","Mental*","Cerebral","Intellect*","Brain*","Agen*","Control","Competen*","Intelligen*","Self-control","Discreet","Discretion","Restrain*","Discipline*","Moral*","Benevolent","Noble","Principled","Righteous","Praiseworthy","Fair","Ethical","Malevolent","Unfair","Corrupt","Evil","Unethical","Impure","Blameworthy","Immoral","Abus*","Recogni*","Accept*","Communicat*","Persuasive","Free-will","Prefer*","Mind*","Inten*","Concentrating","Focused","Value","Appreciate","Likes","Dislikes","Liking","Disliking","Impressed","Unimpressed","Love*","Approve*","Adore*","Imagin*","Envision*","Envisage","Visualiz*","Conceive*","Predict*","Infer*","Attitude*","Notion","Opinion","Perspective","Sentiment","Mind*"),
                    Patient_emotion = c("😱", "🥺", "😭", "😢", "☺️", "😂",  "🥰", "😀", "😃", "😄", "😁", "😆", "🤣", "😊","Relie*", "Comfortable", "Solace", "Sympath*", "Compassion", "Empath*", "Concern*", "Care*", "Caring"),
                    Agent_emotion = c("🤬", "😡", "😠", "😤", "🤭", "👊", "💪", "✊", "😲", "Loathing", "Hatred", "Detest*", "Inspiration", "Elevation", "Ealted", "Awe", "dazzl*")))


  tokens_results <- dfm(d_tokens, dictionary = mind_perception_dictionary, remove = new_stopwords, remove_punct = TRUE) # niet tokens_lookup, want deze is later lastig te tellen
counts <- convert(tokens_results, to ="data.frame") # exporteer counts naar een dataframe
tokens_results
sum(counts$Experience) # sommeer alle counts
sum(counts$Agency)
sum(counts$Patient_emotion)
sum(counts$Agent_emotion)



Experience_dictionary_words <- c("Feel*", "Perceive*", "Perception", "Mood", "Pain", "Ache*", "Agony", "Discomfort", "Injur*", "Wound*", "Miser*", "Distress*", "Anguish*", "Anxiety", "Anxious", "Grief", "Sad*", "Devastated", "Suffer*", "Hurt*", "Emotion*", "Anger", "Angry", "Surprise*", "Disgust*", "Pleasure", "Amuse*", "Enjoy*", "Gratif*", "Passon*","Experience*", "Hunger", "Hungry", "Starved", "Rage", "Enrage*", "Bitter", "Irritated", "Mad", "Obsess*", "Upset", "Aggressive", "Agitated", "Desire*", "Ambition", "Appetite", "Aspir*", "Crav*", "Devot*", "Eager*", "Fascinat*", "Passion", "Greed*", "Lust*", "Inclination", "Admir*", "Affection", "Pride*", "Digni*", "Honor", "Self-esteem", "Temper", "Nostalgic", "Embarrass*", "Shame", "Ashamed", "Humble", "Distress*", "Humiliated", "Joy*", "Happy", "Delight*", "Elate*", "Satisfied", "Satisfaction", "Glad", "Content", "Pleased","Interested", "Absorbed", "Engrossed", "Keen", "Attracted", "Enticed", "Excited", "Thrilled", "Guilty", "Remorse*", "Sorry", "Regret*", "Scared", "Panic*", "Alarmed", "Horrified", "Terrified", "Worried", "Trembling", "Hostile", "Advers*", "Belligerent", "Nasty", "Hateful", "Unfriendly", "Argumentative", "Catty", "Unkind", "Enthusiastic", "Avid", "Earnest", "Proud", "Appreciative", "Illustrious", "Irritable", "Contentious","Exasperated", "Annoyed", "Frustrated", "Grouchy", "Testy", "Brooding", "Moody", "Alert", "Observant", "Perceptive", "Wise", "Clever", "Inspired", "Motivated", "Motivation", "Jittery", "Uptight", "Antsy", "Apprehensive", "Afraid", "Suspicious", "Shocked", "Cheerful", "Jolly", "Joyful", "Jovial", "Optimist*", "Bashful", "Coy", "Self-conscious", "Reticent", "Blush*", "Modest", "Abashed", "Sluggish", "Apathetic", "Daring", "Audacious", "Courageous", "Strong", "Resilient","Tenacious", "Dedicated", "Scornful", "Contemptuous", "Disdain*", "Sneering", "Cynical", "Arrogant", "Egotistic*", "Snooty", "Relaxed", "Serene", "Tranquil", "Poised", "Patien*", "Fear*", "Dread", "Calm", "Tired", "Amazed", "Astounded","Perplexed*", "Dumbfounded", "Dazed","Bewildered", "Timid", "Alert", "Bold", "Unafraid", "Brave", "Shy", "Nervous", "Lonely","Lonesome","Sleepy","Lively","Peppy","At-ease","Drowsy","Downhearted","Dejected","Sorrow*","Dismay*","Dishearten*","Sheepish","Foolish","Mortified","Frightened","Astonished","Loathing","Hatred","Detest*","Confident","Hope*","Energetic","Inspiration","Elevation","Exalted","Awe","Dazzl*","Relie*","Comfortable","Solace","Sympath*","Compassion","Empath*","Concern*","Care*","Caring")
Agency_dictionary_words <- c("Purpose","Plan*","Prepare*","Organize*","Calculate*","Concoct*","Conspire*","Contemplat*","Devise*","Formulate*","Decisive","Decide*","Goal*","Conscious*","Attentive","Aware*","Cognizant","Responsive","Rational","Responsible","Deliberate","Memory","Memorize*","Recall*","Recollect*","Forget*","Forgot*","Remembers","Remembered","Thought*","Considerate","Determined","Judge*","Realize*","Understand*","Conclude*","Foresee*","Comprehend*","Think*","Reason*","Believe*","Mental*","Cerebral","Intellect*","Brain*","Agen*","Control","Competen*","Intelligen*","Self-control","Discreet","Discretion","Restrain*","Discipline*","Moral*","Benevolent","Noble","Principled","Righteous","Praiseworthy","Fair","Ethical","Malevolent","Unfair","Corrupt","Evil","Unethical","Impure","Blameworthy","Immoral","Abus*","Recogni*","Accept*","Communicat*","Persuasive","Free-will","Prefer*","Mind*","Inten*","Concentrating","Focused","Value","Appreciate","Likes","Dislikes","Liking","Disliking","Impressed","Unimpressed","Love*","Approve*","Adore*","Imagin*","Envision*","Envisage","Visualiz*","Conceive*","Predict*","Infer*","Attitude*","Notion","Opinion","Perspective","Sentiment","Mind*")
Patient_emotion_dictionary_words <- c("😱", "🥺", "😭", "😢", "☺️", "😂",  "🥰", "😀", "😃", "😄", "😁", "😆", "🤣", "😊","Relie*", "Comfortable", "Solace", "Sympath*", "Compassion", "Empath*", "Concern*", "Care*", "Caring")
Agent_emotion_dictionary_words <- c("🤬", "😡", "😠", "😤", "🤭", "👊", "💪", "✊", "😲", "Loathing", "Hatred", "Detest*", "Inspiration", "Elevation", "Ealted", "Awe", "dazzl*")

# create list from ¿¿string??
Experience_dictionary_list <- as.list(Experience_dictionary_words)
Agency_dictionary_list <- as.list(Agency_dictionary_words)
Patient_emotion_dictionary_list <- as.list(Patient_emotion_dictionary_words)
Agent_emotion_dictionary_list <- as.list(Agent_emotion_dictionary_words)

## names() is a replacement function¿? so here the updated object is assigned Experience_dictionary_list, is that correct?
names(Experience_dictionary_list) <- Experience_dictionary_words
names(Agency_dictionary_list) <- Agency_dictionary_words
names(Patient_emotion_dictionary_list) <- Patient_emotion_dictionary_words
names(Agent_emotion_dictionary_list) <- Agent_emotion_dictionary_words

#create dictionary from a string of words converted into a list (is this correct?)
Experience_dictionary <- dictionary(Experience_dictionary_list)
Agency_dictionary <- dictionary(Agency_dictionary_list)
Patient_emotion_dictionary <- dictionary(Patient_emotion_dictionary_list)
Agent_emotion_dictionary <- dictionary(Agent_emotion_dictionary_list)

#create dataframematrix that contains frequency counts of Experience_dictionary words in d_data
dfm_results_Experience <- dfm_lookup(d_dfm, dictionary = Experience_dictionary,valuetype = "glob", case_insensitive = TRUE) 
dfm_results_Agency <- dfm_lookup(d_dfm, dictionary = Agency_dictionary,valuetype = "glob", case_insensitive = TRUE) 
# this dfm is needed to get exact counts of each word from the dictionary.
# it can also be used to provide a total estimate of 'experience' words in d_data:
sum(dfm_results_Experience)
# ... as well as to make the frequency counts of the experience_dictionary graphically accessible:
textplot_wordcloud(dfm_trim(dfm_results_Experience, min_termfreq = 3, max_words = 50), colors=c(max(2,ncol(dfm_results_Experience)),"black", "darkgrey"), font = NULL, random_order = FALSE, random_color = FALSE, comparison = TRUE, stem = TRUE, labelsize = 0.5)
textplot_wordcloud(dfm_trim(dfm_results_Agency, min_termfreq = 1, max_words = 40), comparison = TRUE, color = c("darkgrey", "black"))

textplot_wordcloud(dfm_trim(dfm_results_Agency, min_termfreq = 5, max_words = 40), colors=c(max(2,ncol(dfm_results_Agency)),"black","grey60"), font = NULL, random_order = FALSE, random_color = FALSE, comparison = TRUE, stem = TRUE, labelsize = 0.5)

dfm_results_Agency <- dfm_lookup(d_dfm, dictionary = Agent_emotion_dictionary,valuetype = "glob", case_insensitive = TRUE) 
sum(dfm_results_Agency)
textplot_wordcloud(dfm_trim(dfm_results_Agency, min_termfreq = 1, max_words = 40), comparison = TRUE, stem = TRUE)

dfm_results_Patient_emotion <- dfm_lookup(d_dfm, dictionary = Patient_emotion_dictionary,valuetype = "glob", case_insensitive = TRUE) 
sum(dfm_results_Patient_emotion)

dfm_results_Agent_emotion <- dfm_lookup(d_dfm, dictionary = Agency_dictionary,valuetype = "glob", case_insensitive = TRUE) 
sum(dfm_results_Agent_emotion)

counts_Experience <- convert(dfm_results_Experience, to ="data.frame") # exporteer counts naar een dataframe
counts_Agency <- convert(dfm_results_Agency, to ="data.frame") # exporteer counts naar een dataframe
counts_Patient_emotion <- convert(dfm_results_Patient_emotion, to ="data.frame") # exporteer counts naar een dataframe
counts_Agent_emotion <- convert(dfm_results_Agent_emotion, to ="data.frame") # exporteer counts naar een dataframe

rownames(counts_Experience) <- counts_Experience$doc_id # maak de waarden van variable de namen van de rijen
counts_Experience$doc_id <- NULL # verwijder kolom 1

counts_exp <- lapply(counts_Experience, as.numeric)
colSums(counts_exp) # som alle waarden in iedere kolom
textplot_wordcloud(dfm_trim(dfm_results_Experience, min_termfreq = 1, max_words = 40), comparison = FALSE)

rownames(counts_Agency) <- counts_Agency$doc_id

counts_Agency$doc_id <- NULL
colSums(counts_Agency)
textplot_wordcloud(dfm_trim(dfm_results_Agency, min_termfreq = 3, max_words = 50), comparison = FALSE)

rownames(counts_Patient_emotion) <- counts_Patient_emotion$doc_id

counts_Patient_emotion$doc_id <- NULL
colSums(counts_Patient_emotion)
rownames(counts_Agent_emotion) <- counts_Agent_emotion$doc_id

counts_Agent_emotion$doc_id <- NULL
colSums(counts_Agent_emotion)

